import os

import pandas as pd
'''
实例演示：

将一个大Excel等份拆成多个Excel
将多个小Excel合并成一个大Excel并标记来源
'''

def chai_excel():
    work_dir="../course_datas/c15_excel_split_merge"
    splits_dir=f"{work_dir}/splits2"
    if not os.path.exists(splits_dir):
        os.mkdir(splits_dir)
    # 0、读取源Excel到Pandas¶
    df_source = pd.read_excel(f"{work_dir}/crazyant_blog_articles_source.xlsx")
    # print(df_source.head())
    # print(df_source.shape)
    # print(df_source.index)

    # 这个大excel，会拆分给这几个人
    user_names = ["xiao_shuai", "xiao_wang", "xiao_ming", "xiao_lei", "xiao_bo", "xiao_hong"]
    total_row_count = df_source.shape[0]
    split_size = total_row_count // len(user_names)
    if total_row_count % len(user_names) != 0:
        split_size += 1
    # 2、拆分成多个dataframe
    df_subs = []
    for index,user_name in enumerate(user_names):
        # iloc的开始索引
        begin = index*split_size
        # iloc的结束索引
        end = begin+split_size
        # 实现df按照iloc拆分
        df_sub = df_source.iloc[begin:end]
        # 将每个子df存入列表
        df_subs.append((index, user_name, df_sub))

    # 3、将每个datafame存入excel
    for index,user_name,df_sub in df_subs:
        filename = f"{splits_dir}/crazyant_blog_articles_{index}_{user_name}.xlsx"
        df_sub.to_excel(filename,index=False)

# 二、合并多个小Excel到一个大Excel
# 遍历文件夹，得到要合并的Excel文件列表
# 分别读取到dataframe，给每个df添加一列用于标记来源
# 使用pd.concat进行df批量合并
# 将合并后的dataframe输出到excel
def merge_excel():
    work_dir="../course_datas/c15_excel_split_merge"
    splits_dir=f"{work_dir}/splits2"
    excel_list = []
    df_list = []
    for excel_name in os.listdir(splits_dir):
        excel_list.append(excel_name)
    #读取 excel
    for name in excel_list:
        filepath = splits_dir+"/"+name
        df_split = pd.read_excel(filepath)
        username = name.replace("crazyant_blog_articles_", "").replace(".xlsx", "")[2:]
        df_split['username'] = username
        # print(username)
        df_list.append(df_split)

    # 3. 使用pd.concat进行合并
    df_merge = pd.concat(df_list)
    # print(df_merge['username'].value_counts())
    # print(df_merge)
    # 4. 将合并后的dataframe输出到excel
    df_merge.to_excel("../newFile/crazyant_blog_articles_merge.xlsx",index=False)


if __name__ == "__main__":
    # chai_excel()
    merge_excel()



